This is an analysis of differential expression at the transcript level between shedding rate groupings based on average shedding rates for infected blue-winged teal ileum samples.

## Load packages and data
library(limma)
library(edgeR)
library(Glimma)
library(gplots)
library(ggrepel)
library(RColorBrewer)
library(clusterProfiler)
library(gridExtra)
library(kableExtra)
library(topGO)
library(tidyverse)

## Load data
annot <- read.delim("G:/Shared drives/RNA Seq Supershedder Project/BWTE DE manuscript/extData/Trinotate.csv", header = TRUE, sep = "\t")
cnt <- read.table("G:/Shared drives/RNA Seq Supershedder Project/BWTE DE manuscript/extData/rsem.isoform.counts.matrix", header = TRUE)
covars <- read.csv("G:/Shared drives/RNA Seq Supershedder Project/BWTE DE manuscript/extData/BWTE54_SSgroup_Raw_Pool.csv", header = TRUE)

Clean data

Bird 36 is missing a measurement for the weight covariate and bird 19 had improper tissue-based grouping, suggesting a potential sample mix up. They are both removed from the analysis. We also rename the transcripts, dropping the “Trinity_” prefix.

cnt <- cnt %>%
  select(
    -alignEstimateAbundance_BWTE_Ileum_36_S50,
    -alignEstimateAbundance_BWTE_Bursa_36_S31,-alignEstimateAbundance_BWTE_Ileum_19_S35,
    -alignEstimateAbundance_BWTE_Bursa_19_S14) %>%
  rownames_to_column("transcript") %>%
  separate(transcript, into = c(NA, "pt1", "pt2", "gene", "isoform")) %>%
  unite(transcript, pt1, pt2, gene, isoform) %>%
  column_to_rownames("transcript") %>% 
  select(contains("Ileum"))

covars <- covars %>%
  filter(!bird %in% c("36", "19")) %>%
  arrange(bird) %>%
  mutate(group = str_remove(group, "-"))

annot <- annot %>%
  separate(transcript_id, into = c(NA, "pt1", "pt2", "gene", "isoform")) %>%
  unite(transcript_id, pt1, pt2, gene, isoform)

Set up model effects

bird <- as.factor(covars$bird)
sex <- as.factor(covars$sex)
age <- as.numeric(covars$age)
weight <- covars$wt_55
group <- recode(covars$group, C1 = "Ctl", C14 = "Ctl") %>% 
  as.factor()
pool <- as.factor(covars$Pool.Ileum)
ssgroup.virus.avg <- as.factor(covars$SSgroup.virus.avg)

covars.tib <- data.frame(bird,
                    sex,
                    age,
                    weight,
                    group,
                    pool,
                    ssgroup.virus.avg) %>% 
  as_tibble()

Prep data for analysis

Here we standardize expression levels across samples of varying depth by using the counts per million (CPM) and trimmed mean of M-values (TMM) methods. We also remove any transcripts that are expressed at few than 0.5 CPM in at least 25% of individuals.

#Convert to DGEList object
dge <- DGEList(counts=cnt)
dge$genes <- annot[match(rownames(dge$counts), annot$transcript_id),]

#CPM and log-CPM
cpm <- cpm(dge)
lcpm <- cpm(dge, log = TRUE)

#### Retain only transcripts expressed at >0.5 CPM in at least 25% of individuals
table(rowSums(dge$counts==0) == length(cnt[1,]))
## 
##  FALSE   TRUE 
## 484888  86217
keep.exprs <- rowSums(cpm>0.5) >= length(cnt[1,])/4
sum(keep.exprs)
## [1] 66980
dge <- dge[keep.exprs,, keep.lib.sizes=FALSE]
table(rowSums(dge$counts==0) == length(cnt[1,]))
## 
## FALSE 
## 66980
dim(dge)
## [1] 66980    52
#TMM normalization
dge <- calcNormFactors(dge, method = "TMM")

Subset data

Following normalization, we subset the data based on infection groups.

#Assign groups in DGElist
dge$samples$group <- covars.tib$group

### Divide out I5
dge.I5 <- dge[,grep('I5', dge$samples$group)]

covars.I5 <- covars.tib %>% filter(group == "I5")

Differential expression analysis

## Establish design matrix: I5
design.I5 = model.matrix(~ 0 +
                        factor(covars.I5$ssgroup.virus.avg) +
                        covars.I5$age +
                        covars.I5$sex + 
                        covars.I5$weight + 
                        covars.I5$pool)

colnames(design.I5) <- c("HIGH", "LOW", "MODERATE",
                         "age", "sexM", "weight", "pool3")


contr.matrix.I5 = makeContrasts(
  LvM = LOW - MODERATE,
  MvH = MODERATE - HIGH,
  LvH = LOW - HIGH,
  levels = colnames(design.I5))

## Mean-variance trend plots
v.I5 <- voomWithQualityWeights(dge.I5, design.I5, plot = FALSE)

#Fitting models
vfit.I5 <- lmFit(v.I5, design.I5) 
vfit.I5 <- contrasts.fit(vfit.I5, contrasts = contr.matrix.I5)
tfit.I5 <- treat(vfit.I5, lfc = 1.0)

## Identify DE genes
dt.I5 <- decideTests(tfit.I5, p.value = 0.1, adjust.method = "fdr")
dt.tib.I5 <- as_tibble(dt.I5, rownames = NA) %>% 
  rownames_to_column("transcript") %>% 
  mutate_at(vars(starts_with("L")), as.numeric) %>% 
  mutate_at(vars(starts_with("M")), as.numeric) %>% 
  rename(LvM.I5 = LvM,
         MvH.I5 = MvH,
         LvH.I5 = LvH)

dt.tib <- dt.tib.I5 %>% 
  mutate_at(vars(starts_with("L")), as.numeric) %>% 
  mutate_at(vars(starts_with("M")), as.numeric) %>% 
  filter_at(vars(LvM.I5:LvH.I5), any_vars(. != 0))

Numbers of DE genes

allResults <- as.data.frame(summary(dt.I5)) %>% 
  filter(Var1 != "NotSig")

kable(allResults) %>%
  kable_styling("striped", full_width = F)
Var1 Var2 Freq
Down LvM 0
Up LvM 0
Down MvH 1
Up MvH 38
Down LvH 1
Up LvH 102

Volcano plot

tmp1 <- topTreat(tfit.I5, coef = 1, n = Inf)
results1 <- mutate(tmp1, sig=ifelse(tmp1$adj.P.Val<0.1, "Sig", "Not Sig"))
p1 <- ggplot(results1, aes(logFC, -log10(P.Value))) +
  geom_point(aes(col = sig)) +
  scale_color_manual(values=c("black", "red")) +
  ggtitle("Low vs. Moderate shedding") +
  ylab("-Log10(Adjusted p-value)") +
  xlab("Log(Fold count)") +
  theme_bw() +
  theme(legend.position = "none")


tmp2 <- topTreat(tfit.I5, coef = 2, n = Inf)
results2 <- mutate(tmp2, sig=ifelse(tmp2$adj.P.Val<0.1, "Sig", "Not Sig"))
p2 <- ggplot(results2, aes(logFC, -log10(P.Value))) +
  geom_point(aes(col = sig)) +
  scale_color_manual(values=c("black", "red")) +
  ggtitle("Moderate vs. High shedding") +
  ylab("-Log10(Adjusted p-value)") +
  xlab("Log(Fold count)") +
  theme_bw() +
  theme(legend.position = "none")

tmp3 <- topTreat(tfit.I5, coef = 3, n = Inf)
results3 <- mutate(tmp3, sig=ifelse(tmp3$adj.P.Val<0.1, "Sig", "Not Sig"))
p3 <- ggplot(results3, aes(logFC, -log10(P.Value))) +
  geom_point(aes(col = sig)) +
  scale_color_manual(values=c("black", "red")) +
  ggtitle("Low vs. High shedding") +
  ylab("-Log10(Adjusted p-value)") +
  xlab("Log(Fold count)") +
  theme_bw() +
  theme(legend.position = "none")

grid.arrange(p1, p2, p3, nrow = 1)

Heatmap

# Get the gene names for DE genes
lcpm2 <- as_tibble(dge.I5$counts) %>% 
  cpm(., log = TRUE)

newNames <- colnames(lcpm2) %>% 
  as_tibble() %>% 
  separate(value, into = c(NA, NA, NA, "sample", NA)) %>% 
  bind_cols(covars.I5) %>% 
  unite("newNames", ssgroup.virus.avg, group, bird)

colOrder <- bind_cols(covars.I5) %>% 
  mutate(ssgroup.virus.avg = fct_relevel(ssgroup.virus.avg, "LOW", "MODERATE", "HIGH")) %>%
  arrange(ssgroup.virus.avg, group, bird) %>% 
  unite("newNames", ssgroup.virus.avg, group, bird)

colnames(lcpm2) <- newNames$newNames
lcpm2 <- lcpm2[, colOrder$newNames]

heatmap.annot <- annot %>%
  select(
    transcript_id,
    sprot_Top_BLASTX_hit) %>% 
  filter(transcript_id %in% dt.tib$transcript) %>% 
  separate(sprot_Top_BLASTX_hit, into = c("sprot1", NA, NA, NA, NA, "sprot2", NA), "\\^") %>% 
  separate(sprot2, sep = "=", into = c(NA, "SwissProt_GeneFunction")) %>% 
  separate(sprot1, sep = "_", into = c("SwissProt_GeneName", NA)) %>% 
  distinct(transcript_id, SwissProt_GeneName) %>% 
  filter(SwissProt_GeneName != ".")

deGeneCounts <- lcpm2 %>% 
  as_tibble() %>% 
  mutate(transcript = rownames(dge$counts)) %>% 
  filter(transcript %in% dt.tib$transcript) %>%
  rename(transcript_id = transcript) %>% 
  left_join(heatmap.annot) %>% 
  mutate(SwissProt_GeneName = replace_na(SwissProt_GeneName, ".")) %>% 
  unite(transcript, transcript_id:SwissProt_GeneName, sep = " - ") %>% 
  column_to_rownames("transcript") %>% 
  as.matrix()

## Set up palette
mypalette <- brewer.pal(11,"RdYlBu")
morecols <- colorRampPalette(mypalette)

hc <- hclust(as.dist(1-cor(t(deGeneCounts))))

# Plot the heatmap
heatmap.2(deGeneCounts,
          Colv = FALSE,
          Rowv = as.dendrogram(hc),
          col = rev(morecols(50)),
          trace = "none",
          colsep = c(5, 10),
          dendrogram = "row",
          density.info = "none",
          key = TRUE,
          #main = "Ileum - Gene level",
          margins = c(10, 10),
          scale ="row")

Boxplots for differentially expressed transcripts

lcpm.DE <- lcpm %>%
  as_tibble(rownames = NA) %>% 
  rownames_to_column("identifier") %>% 
  filter(identifier %in% dt.tib$transcript) %>%
  pivot_longer(cols = contains("_"),
               names_to = "sample",
               values_to = "lcpm") %>%
  separate(sample, into = c(NA, NA, "tissue", "bird", NA)) %>%
  mutate(bird = as.integer(bird)) %>%
  left_join(covars, by = "bird") %>%
  filter(group == "C1" | group == "C14" | group == "I5") %>% 
  select(identifier, bird, lcpm, SSgroup.virus.avg, group) %>% 
  mutate(group = recode(group, C14 = "Control"),
       group = recode(group, C1 = "Control"),
       tmpID = ifelse(group == "Control", 'Control', 'Infected'))


aprioriPlotting <- function(target, ...) {
  annot.target <- annot %>%
    select(
      transcript_id,
      sprot_Top_BLASTX_hit) %>%
    filter(transcript_id == target) %>% 
    separate(sprot_Top_BLASTX_hit, into = c("sprot1", NA, NA, NA, NA, NA, NA), "\\^") %>% 
    separate(sprot1, sep = "_", into = c("SwissProt_GeneName", NA))
  
  plot <- lcpm.DE %>%
    filter(identifier == target) %>%
    mutate(SSgroup.virus.avg = fct_relevel(SSgroup.virus.avg, "LOW", "MODERATE", "HIGH")) %>%
    ggplot(aes(x = SSgroup.virus.avg, y = lcpm, fill = factor(group))) +
    facet_grid(. ~ tmpID, scales = "free", space = "free") +
    ylab("Log2(Counts per million)") +
    xlab("Shedding Group") +
    scale_fill_grey(start = 0.35, end = 1) + 
    geom_point(position = position_dodge(width=0.75), aes(group = group), show.legend = FALSE) +
    geom_boxplot(alpha = 0.5) +
    geom_label_repel(aes(label = bird, group = group, fill = NULL), position = position_dodge(width=0.75), show.legend=FALSE) +
    theme_classic() +
    labs(title= paste0(target, " - ", annot.target[1,2])) +
    theme(legend.title = element_blank())
  
  print(plot)
}

for(trans in sort(unique(lcpm.DE$identifier))) {
  aprioriPlotting(trans)
}

Annotations for differentially expressed transcripts

annot %>%
  select(
    transcript_id,
    sprot_Top_BLASTX_hit
  ) %>%
  rename(transcript = transcript_id) %>% 
  inner_join(dt.tib) %>% 
  separate(sprot_Top_BLASTX_hit, into = c("sprot1", NA, NA, NA, NA, "sprot2", NA), "\\^") %>% 
  separate(sprot2, sep = "=", into = c(NA, "SwissProt_GeneFunction")) %>% 
  separate(sprot1, sep = "_", into = c("SwissProt_GeneName", NA)) %>% 
  mutate_at(.vars = vars(LvM.I5:LvH.I5),
            .funs = ~ ifelse(. == 1, "up", .)) %>% 
  mutate_at(.vars = vars(LvM.I5:LvH.I5),
            .funs = ~ ifelse(. == -1, "down", .)) %>% 
  filter_at(vars(LvM.I5:LvH.I5), any_vars(. != 0)) %>% 
  distinct(transcript, .keep_all = TRUE) %>% 
  select(-SwissProt_GeneFunction) %>% 
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
transcript SwissProt_GeneName LvM.I5 MvH.I5 LvH.I5
DN89_c3_g1_i2 MEIS1 0 up up
DN11629_c0_g1_i9 . 0 0 up
DN10879_c1_g1_i8 RLF 0 0 up
DN2043_c0_g1_i3 KBRS2 0 0 up
DN2036_c0_g1_i4 MBD2 0 0 up
DN430_c0_g1_i5 GLT12 0 0 up
DN7842_c0_g1_i8 ALG13 0 up up
DN1146_c0_g1_i3 MD12L 0 0 up
DN1128_c0_g1_i5 RCC1 0 up up
DN6928_c0_g1_i5 NAIF1 0 0 up
DN6089_c0_g2_i9 . 0 up up
DN6099_c0_g1_i3 SC22A 0 up up
DN15503_c0_g1_i2 POL 0 up up
DN15518_c0_g1_i8 SCNNB 0 up up
DN885_c0_g1_i1 BUD31 0 0 up
DN5108_c0_g1_i1 TBC13 0 0 up
DN5157_c1_g1_i10 . 0 0 up
DN4235_c0_g1_i1 SIM13 0 up up
DN4247_c0_g1_i13 DPOG2 0 up up
DN5456_c0_g1_i2 C560 0 up up
DN3676_c0_g1_i10 CNOT6 0 0 up
DN2785_c0_g1_i2 UBA5 0 0 up
DN8539_c0_g1_i1 CHAC1 0 0 up
DN7628_c0_g1_i2 DIEXF 0 up up
DN207_c1_g1_i14 IQEC1 0 0 up
DN4008_c0_g1_i8 PTPRK 0 0 up
DN663_c0_g1_i4 AGRG5 0 0 up
DN682_c0_g3_i4 RRAGA 0 0 up
DN613_c2_g1_i7 MRVI1 0 0 up
DN603_c0_g1_i9 TERT 0 0 up
DN3137_c0_g1_i9 S2536 0 0 up
DN7924_c0_g1_i6 CAMP1 0 up up
DN18178_c0_g1_i3 PMGE 0 0 up
DN10078_c1_g1_i12 PARN 0 0 up
DN16556_c0_g1_i2 BICRL 0 0 up
DN5255_c0_g2_i6 TMCC1 0 0 up
DN4309_c0_g1_i7 PCSK6 0 0 up
DN2519_c0_g1_i4 . 0 0 up
DN12268_c0_g1_i8 KYNU 0 up up
DN10663_c0_g1_i2 SIR5 0 up up
DN2832_c0_g1_i12 . 0 0 up
DN1982_c0_g1_i3 . 0 0 up
DN1956_c0_g1_i7 LPIN1 0 0 up
DN25185_c0_g1_i5 . 0 0 up
DN6804_c0_g1_i7 . 0 0 up
DN6804_c0_g1_i1 DKC1 0 0 up
DN6850_c1_g1_i3 ELMO2 0 up up
DN22798_c0_g1_i8 SNX17 0 0 up
DN9924_c0_g2_i7 . 0 up up
DN2376_c1_g1_i6 AT8B2 0 up up
DN2387_c1_g1_i3 . 0 up up
DN2333_c0_g1_i2 F169A 0 0 up
DN2333_c1_g1_i1 F135A 0 up up
DN2397_c1_g1_i4 IRAK4 0 up 0
DN1407_c5_g1_i2 . 0 down 0
DN1433_c0_g1_i11 ARI1B 0 0 up
DN1449_c0_g1_i12 CBX1 0 0 up
DN6364_c0_g1_i4 PLGT2 0 up up
DN10398_c0_g1_i3 RHG29 0 0 up
DN3594_c0_g1_i2 . 0 up up
DN6658_c2_g1_i10 . 0 0 up
DN6616_c2_g1_i17 CCN2 0 0 up
DN5702_c0_g1_i6 LMBL3 0 0 up
DN5796_c0_g1_i8 . 0 0 up
DN143_c2_g1_i2 . 0 0 up
DN133_c1_g1_i1 LRWD1 0 up up
DN154_c3_g1_i4 . 0 0 up
DN21574_c0_g1_i1 . 0 up up
DN21599_c0_g1_i6 MNS1 0 up up
DN11783_c0_g3_i1 . 0 0 up
DN9768_c0_g1_i7 CCNE2 0 0 up
DN9743_c0_g1_i21 CXXC1 0 0 up
DN3085_c1_g1_i6 . 0 0 up
DN3018_c0_g1_i18 KHK 0 up up
DN3084_c0_g1_i13 . 0 0 up
DN3044_c1_g1_i4 ANR26 0 up up
DN582_c0_g1_i1 RAD21 0 up up
DN2129_c0_g1_i12 OGFD2 0 0 up
DN10956_c0_g1_i1 MRC1 0 0 up
DN1264_c0_g1_i2 REEP2 0 0 up
DN1230_c0_g1_i13 CFLAR 0 0 up
DN6130_c0_g1_i8 GCN1 0 up up
DN3358_c1_g1_i14 TRPS1 0 0 up
DN3362_c0_g1_i6 . 0 up 0
DN2447_c4_g1_i3 . 0 0 up
DN2431_c0_g1_i15 . 0 0 down
DN7377_c0_g1_i8 ATAD3 0 0 up
DN6423_c0_g1_i4 IFT74 0 0 up
DN10599_c0_g1_i1 PEDF 0 up up
DN1083_c2_g1_i11 . 0 0 up
DN1044_c0_g1_i3 ZN318 0 up up
DN327_c0_g1_i6 MSD2 0 0 up
DN308_c0_g1_i5 ZCHC2 0 0 up
DN5860_c0_g1_i2 PACR 0 up up
DN4911_c0_g1_i8 SETMR 0 up 0
DN5091_c0_g2_i6 ATL2 0 up up
DN751_c2_g3_i2 . 0 0 up
DN745_c0_g1_i22 SEPT7 0 0 up
DN9837_c0_g1_i9 HOP2 0 0 up
DN2291_c0_g1_i9 MCAT 0 0 up
DN2224_c0_g1_i1 . 0 up up
DN15912_c0_g1_i10 . 0 0 up
DN32038_c0_g1_i1 . 0 0 up
DN9334_c0_g1_i4 CARM1 0 up up
DN8480_c0_g2_i2 ARI5B 0 up up
DN14071_c0_g1_i5 . 0 0 up
DN130272_c0_g3_i2 CPTP 0 0 up

Transcript functions

annot %>%
  select(
    transcript_id,
    sprot_Top_BLASTX_hit,
    Kegg) %>%
  rename(transcript = transcript_id) %>% 
  inner_join(dt.tib) %>% 
  separate(sprot_Top_BLASTX_hit, into = c("sprot1", NA, NA, NA, NA, "sprot2", NA), "\\^") %>% 
  separate(sprot2, sep = "=", into = c(NA, "SwissProt_GeneFunction")) %>% 
  separate(sprot1, sep = "_", into = c("SwissProt_GeneName", NA)) %>% 
  separate(Kegg, into = c("KEGG_ID", "KO_ID"), sep = "\\`") %>% 
  distinct(transcript, SwissProt_GeneName, KEGG_ID, .keep_all = TRUE) %>% 
  select(transcript, SwissProt_GeneName, SwissProt_GeneFunction, KEGG_ID, KO_ID) %>% 
  filter(SwissProt_GeneName != ".") %>% 
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
transcript SwissProt_GeneName SwissProt_GeneFunction KEGG_ID KO_ID
DN89_c3_g1_i2 MEIS1 Homeobox protein Meis1; KEGG:mmu:17268 KO:K15613
DN10879_c1_g1_i8 RLF Zinc finger protein Rlf; KEGG:hsa:6018 NA
DN2043_c0_g1_i3 KBRS2 NF-kappa-B inhibitor-interacting Ras-like protein 2; KEGG:gga:420032 KO:K17197
DN2036_c0_g1_i4 MBD2 Methyl-CpG-binding domain protein 2 {ECO:0000305}; KEGG:hsa:8932 KO:K11590
DN430_c0_g1_i5 GLT12 Polypeptide N-acetylgalactosaminyltransferase 12; KEGG:hsa:79695 KO:K00710
DN7842_c0_g1_i8 ALG13 UDP-N-acetylglucosamine transferase subunit ALG13 homolog; KEGG:rno:300284 KO:K07432
DN1146_c0_g1_i3 MD12L Mediator of RNA polymerase II transcription subunit 12-like protein; KEGG:hsa:116931 KO:K15162
DN1128_c0_g1_i5 RCC1 Regulator of chromosome condensation; KEGG:hsa:1104 KO:K11493
DN6928_c0_g1_i5 NAIF1 Nuclear apoptosis-inducing factor 1; . NA
DN6099_c0_g1_i3 SC22A Vesicle-trafficking protein SEC22a; KEGG:hsa:26984 KO:K08520
DN15503_c0_g1_i2 POL Gag-Pol polyprotein; KEGG:vg:1491877 NA
DN15518_c0_g1_i8 SCNNB Amiloride-sensitive sodium channel subunit beta; . NA
DN885_c0_g1_i1 BUD31 Protein BUD31 homolog; KEGG:rno:89819 KO:K12873
DN5108_c0_g1_i1 TBC13 TBC1 domain family member 13; KEGG:hsa:54662 NA
DN4235_c0_g1_i1 SIM13 Small integral membrane protein 13; KEGG:hsa:221710 NA
DN4247_c0_g1_i13 DPOG2 DNA polymerase subunit gamma-2, mitochondrial; KEGG:bta:505152 KO:K02333
DN4247_c0_g1_i13 DPOG2 DNA polymerase subunit gamma-2, mitochondrial; KEGG:hsa:11232 KO:K02333
DN5456_c0_g1_i2 C560 Succinate dehydrogenase cytochrome b560 subunit, mitochondrial; KEGG:ssc:100524676 KO:K00236
DN3676_c0_g1_i10 CNOT6 CCR4-NOT transcription complex subunit 6; KEGG:rno:287249 KO:K12603
DN3676_c0_g1_i10 CNOT6 CCR4-NOT transcription complex subunit 6; KEGG:xla:100505426 KO:K12603
DN2785_c0_g1_i2 UBA5 Ubiquitin-like modifier-activating enzyme 5; KEGG:gga:414879 KO:K12164
DN8539_c0_g1_i1 CHAC1 Glutathione-specific gamma-glutamylcyclotransferase 1 {ECO:0000303|PubMed:27913623}; KEGG:hsa:79094 KO:K07232
DN7628_c0_g1_i2 DIEXF Digestive organ expansion factor homolog; KEGG:gga:421384 KO:K14774
DN207_c1_g1_i14 IQEC1 IQ motif and SEC7 domain-containing protein 1; KEGG:hsa:9922 KO:K12495
DN4008_c0_g1_i8 PTPRK Receptor-type tyrosine-protein phosphatase kappa; KEGG:hsa:5796 KO:K06776
DN4008_c0_g1_i8 PTPRK Receptor-type tyrosine-protein phosphatase kappa; KEGG:hsa:5797 KO:K05693
DN663_c0_g1_i4 AGRG5 Adhesion G-protein coupled receptor G5; KEGG:mmu:382045 KO:K08459
DN682_c0_g3_i4 RRAGA Ras-related GTP-binding protein A {ECO:0000305}; KEGG:rno:117044 KO:K16185
DN613_c2_g1_i7 MRVI1 Protein MRVI1; KEGG:hsa:10335 KO:K12337
DN603_c0_g1_i9 TERT Telomerase reverse transcriptase; KEGG:cfa:403412 KO:K11126
DN3137_c0_g1_i9 S2536 Solute carrier family 25 member 36; KEGG:gga:424817 KO:K15116
DN7924_c0_g1_i6 CAMP1 Calmodulin-regulated spectrin-associated protein 1; KEGG:hsa:157922 KO:K17493
DN18178_c0_g1_i3 PMGE Bisphosphoglycerate mutase; KEGG:hsa:669 KO:K01837
DN10078_c1_g1_i12 PARN Poly(A)-specific ribonuclease PARN; KEGG:hsa:5073 KO:K01148
DN16556_c0_g1_i2 BICRL BRD4-interacting chromatin-remodeling complex-associated protein-like {ECO:0000250|UniProtKB:Q6AI39}; KEGG:mmu:210982 NA
DN5255_c0_g2_i6 TMCC1 Transmembrane and coiled-coil domains protein 1 {ECO:0000250|UniProtKB:O94876}; KEGG:mmu:330401 NA
DN4309_c0_g1_i7 PCSK6 Proprotein convertase subtilisin/kexin type 6; KEGG:hsa:5046 KO:K08672
DN12268_c0_g1_i8 KYNU Kynureninase {ECO:0000255|HAMAP-Rule:MF_03017}; KEGG:hsa:8942 KO:K01556
DN10663_c0_g1_i2 SIR5 NAD-dependent protein deacylase sirtuin-5, mitochondrial {ECO:0000255|HAMAP-Rule:MF_03160}; KEGG:gga:420834 KO:K11415
DN1956_c0_g1_i7 LPIN1 Phosphatidate phosphatase LPIN1; KEGG:mmu:14245 KO:K15728
DN6804_c0_g1_i1 DKC1 H/ACA ribonucleoprotein complex subunit DKC1; KEGG:gga:422196 KO:K11131
DN6850_c1_g1_i3 ELMO2 Engulfment and cell motility protein 2; KEGG:hsa:63916 KO:K18985
DN6850_c1_g1_i3 ELMO2 Engulfment and cell motility protein 2; KEGG:mmu:228875 KO:K15280
DN22798_c0_g1_i8 SNX17 Sorting nexin-17; KEGG:dre:568263 KO:K17929
DN22798_c0_g1_i8 SNX17 Sorting nexin-17; KEGG:mmu:266781 KO:K17929
DN2376_c1_g1_i6 AT8B2 Phospholipid-transporting ATPase ID; KEGG:hsa:57198 KO:K01530
DN2333_c0_g1_i2 F169A Soluble lamin-associated protein of 75 kDa; KEGG:hsa:26049 NA
DN2333_c1_g1_i1 F135A Protein FAM135A; KEGG:hsa:57579 NA
DN2397_c1_g1_i4 IRAK4 Interleukin-1 receptor-associated kinase 4; KEGG:hsa:51135 KO:K04733
DN2397_c1_g1_i4 IRAK4 Interleukin-1 receptor-associated kinase 4; KEGG:hsa:83448 KO:K06176
DN2397_c1_g1_i4 IRAK4 Interleukin-1 receptor-associated kinase 4; KEGG:mmu:266632 KO:K04733
DN1433_c0_g1_i11 ARI1B AT-rich interactive domain-containing protein 1B; KEGG:hsa:57492 KO:K11653
DN1433_c0_g1_i11 ARI1B AT-rich interactive domain-containing protein 1B; KEGG:mmu:239985 KO:K11653
DN1449_c0_g1_i12 CBX1 Chromobox protein homolog 1; KEGG:mmu:12412 KO:K11585
DN6364_c0_g1_i4 PLGT2 Protein O-glucosyltransferase 2 {ECO:0000305|PubMed:30127001}; KEGG:hsa:79070 NA
DN10398_c0_g1_i3 RHG29 Rho GTPase-activating protein 29; KEGG:hsa:9411 KO:K20644
DN10398_c0_g1_i3 RHG29 Rho GTPase-activating protein 29; KEGG:dre:378998 KO:K20644
DN6616_c2_g1_i17 CCN2 CCN family member 2; KEGG:mmu:14219 KO:K06827
DN5702_c0_g1_i6 LMBL3 Lethal(3)malignant brain tumor-like protein 3; KEGG:hsa:84456 NA
DN133_c1_g1_i1 LRWD1 Leucine-rich repeat and WD repeat-containing protein 1; KEGG:xtr:100145159 NA
DN133_c1_g1_i1 LRWD1 Leucine-rich repeat and WD repeat-containing protein 1; KEGG:xla:444544 NA
DN21599_c0_g1_i6 MNS1 Meiosis-specific nuclear structural protein 1; KEGG:mmu:17427 NA
DN9768_c0_g1_i7 CCNE2 G1/S-specific cyclin-E2; KEGG:hsa:9134 KO:K06626
DN9743_c0_g1_i21 CXXC1 CXXC-type zinc finger protein 1; KEGG:bta:511446 KO:K14960
DN9743_c0_g1_i21 CXXC1 CXXC-type zinc finger protein 1; KEGG:hsa:30827 KO:K14960
DN3018_c0_g1_i18 KHK Ketohexokinase {ECO:0000312|RGD:2966}; KEGG:hsa:3795 KO:K00846
DN3018_c0_g1_i18 KHK Ketohexokinase {ECO:0000312|RGD:2966}; KEGG:rno:25659 KO:K00846
DN3018_c0_g1_i18 KHK Ketohexokinase {ECO:0000312|RGD:2966}; KEGG:hsa:284001 NA
DN3044_c1_g1_i4 ANR26 Ankyrin repeat domain-containing protein 26 {ECO:0000305}; KEGG:hsa:22852 NA
DN582_c0_g1_i1 RAD21 Double-strand-break repair protein rad21 homolog; KEGG:bta:540966 KO:K06670
DN2129_c0_g1_i12 OGFD2 2-oxoglutarate and iron-dependent oxygenase domain-containing protein 2; KEGG:dre:790923 NA
DN10956_c0_g1_i1 MRC1 Macrophage mannose receptor 1; KEGG:hsa:4360 KO:K06560
DN1264_c0_g1_i2 REEP2 Receptor expression-enhancing protein 2; KEGG:hsa:51308 KO:K17338
DN1264_c0_g1_i2 REEP2 Receptor expression-enhancing protein 2; KEGG:mmu:225362 KO:K17338
DN1230_c0_g1_i13 CFLAR CASP8 and FADD-like apoptosis regulator; KEGG:hsa:8837 KO:K04724
DN6130_c0_g1_i8 GCN1 eIF-2-alpha kinase activator GCN1 {ECO:0000250|UniProtKB:E9PVA8}; KEGG:hsa:10985 NA
DN3358_c1_g1_i14 TRPS1 Zinc finger transcription factor Trps1; KEGG:hsa:7227 KO:K22040
DN7377_c0_g1_i8 ATAD3 ATPase family AAA domain-containing protein 3; KEGG:bta:784353 KO:K17681
DN7377_c0_g1_i8 ATAD3 ATPase family AAA domain-containing protein 3; KEGG:xla:398759 KO:K17681
DN7377_c0_g1_i8 ATAD3 ATPase family AAA domain-containing protein 3; KEGG:xtr:407895 KO:K17681
DN6423_c0_g1_i4 IFT74 Intraflagellar transport protein 74 homolog; KEGG:mmu:67694 KO:K19679
DN10599_c0_g1_i1 PEDF Pigment epithelium-derived factor; KEGG:mmu:20317 KO:K19614
DN1044_c0_g1_i3 ZN318 Zinc finger protein 318; KEGG:hsa:24149 NA
DN327_c0_g1_i6 MSD2 Myb/SANT-like DNA-binding domain-containing protein 2; KEGG:gga:768763 NA
DN308_c0_g1_i5 ZCHC2 Zinc finger CCHC domain-containing protein 2; KEGG:hsa:54877 KO:K22700
DN5860_c0_g1_i2 PACR Pituitary adenylate cyclase-activating polypeptide type I receptor; KEGG:bta:319095 KO:K04587
DN4911_c0_g1_i8 SETMR Histone-lysine N-methyltransferase SETMAR {ECO:0000305}; KEGG:mmu:74729 KO:K11433
DN5091_c0_g2_i6 ATL2 ADAMTS-like protein 2; KEGG:hsa:9719 NA
DN745_c0_g1_i22 SEPT7 Septin-7; KEGG:pon:100173884 KO:K16944
DN9837_c0_g1_i9 HOP2 Homologous-pairing protein 2 homolog; KEGG:hsa:29893 KO:K06695
DN2291_c0_g1_i9 MCAT Mitochondrial carnitine/acylcarnitine carrier protein; KEGG:mcf:102124681 KO:K15109
DN2291_c0_g1_i9 MCAT Mitochondrial carnitine/acylcarnitine carrier protein; KEGG:mmu:57279 KO:K15109
DN9334_c0_g1_i4 CARM1 Histone-arginine methyltransferase CARM1; KEGG:hsa:10498 KO:K05931
DN8480_c0_g2_i2 ARI5B AT-rich interactive domain-containing protein 5B; KEGG:gga:423661 NA
DN130272_c0_g3_i2 CPTP Ceramide-1-phosphate transfer protein; KEGG:xla:735049 NA

Gene Ontology Enrichment Analysis

topGO.mappings <- readMappings("G:/Shared drives/RNA Seq Supershedder Project/BWTE DE manuscript/extData/GOdbTrans.txt", sep = "\t", IDsep = ",")

goEnrich <- function(targetComp, ...) {
  DE.results <- dt.tib %>% filter(!!sym(targetComp) != 0)
  
  all.genes <- sort(unique(as.character(rownames(dge$counts))))
  int.genes <- DE.results$transcript
  int.genes <- factor(as.integer(all.genes %in% int.genes))
  names(int.genes) = all.genes
  
  go.obj.BP <- new("topGOdata", ontology='BP',
              allGenes = int.genes,
              annot = annFUN.gene2GO,
              nodeSize = 5,
              gene2GO = topGO.mappings)

  go.obj.MF <- new("topGOdata", ontology='MF',
                 allGenes = int.genes,
                 annot = annFUN.gene2GO,
                 nodeSize = 5,
                 gene2GO = topGO.mappings)

  go.obj.CC <- new("topGOdata", ontology='CC',
                 allGenes = int.genes,
                 annot = annFUN.gene2GO,
                 nodeSize = 5,
                 gene2GO = topGO.mappings)
  
  results.BP <- runTest(go.obj.BP, algorithm = "elim", statistic = "fisher")

  results.tab.BP <- GenTable(object = go.obj.BP, 
                          elimFisher = results.BP, 
                          topNodes = 50) %>% 
    as_tibble() %>% 
    mutate(pVal = as.numeric(elimFisher)) %>% 
    filter(pVal < 0.01) %>% 
    mutate(Domain = "BP") %>% 
    mutate(Comparison = targetComp)
  
  results.MF <- runTest(go.obj.MF, algorithm = "elim", statistic = "fisher")
  
  results.tab.MF <- GenTable(object = go.obj.MF, 
                          elimFisher = results.MF, 
                          topNodes = 50) %>% 
    as_tibble() %>% 
    mutate(pVal = as.numeric(elimFisher)) %>% 
    filter(pVal < 0.01) %>% 
    mutate(Domain = "MF") %>% 
    mutate(Comparison = targetComp)
  
  results.CC <- runTest(go.obj.CC, algorithm = "elim", statistic = "fisher")
  
  results.tab.CC <- GenTable(object = go.obj.CC, 
                          elimFisher = results.CC, 
                          topNodes = 50) %>% 
    as_tibble() %>% 
    mutate(pVal = as.numeric(elimFisher)) %>% 
    filter(pVal < 0.01) %>% 
    mutate(Domain = "CC") %>% 
    mutate(Comparison = targetComp)
  
  rbind(results.tab.BP,
    results.tab.MF,
    results.tab.CC)
}

results <- list()

for(z in unique(names(dt.tib)[-1])) {
  if (dt.tib %>% 
      filter(!!sym(z) != 0) %>% 
      nrow(.) > 0)
  results[[length(results)+1]] <- goEnrich(z)
}

results.tib <- bind_rows(results, .id = "column_label") %>%
  select(-column_label) %>% 
  arrange(GO.ID, Comparison)

kable(results.tib) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
GO.ID Term Annotated Significant Expected elimFisher pVal Domain Comparison
GO:0000002 mitochondrial genome maintenance 87 2 0.14 0.00927 0.009270 BP LvH.I5
GO:0000262 mitochondrial chromosome 9 1 0.01 0.0059 0.005900 CC MvH.I5
GO:0000333 telomerase catalytic core complex 5 1 0.01 0.0081 0.008100 CC LvH.I5
GO:0000495 box H/ACA snoRNA 3’-end processing 14 2 0.02 0.00024 0.000240 BP LvH.I5
GO:0000794 condensed nuclear chromosome 259 3 0.42 0.0088 0.008800 CC LvH.I5
GO:0003420 regulation of growth plate cartilage cho… 14 1 0.01 0.0098 0.009800 BP MvH.I5
GO:0003721 telomerase RNA reverse transcriptase act… 5 1 0.01 0.0085 0.008500 MF LvH.I5
GO:0003887 DNA-directed DNA polymerase activity 212 2 0.15 0.0097 0.009700 MF MvH.I5
GO:0004454 ketohexokinase activity 9 1 0.01 0.0063 0.006300 MF MvH.I5
GO:0004535 poly(A)-specific ribonuclease activity 40 2 0.07 0.0021 0.002100 MF LvH.I5
GO:0004999 vasoactive intestinal polypeptide recept… 8 1 0.01 0.0056 0.005600 MF MvH.I5
GO:0005087 Ran guanyl-nucleotide exchange factor ac… 8 1 0.01 0.0056 0.005600 MF MvH.I5
GO:0005721 pericentric heterochromatin 65 2 0.11 0.0051 0.005100 CC LvH.I5
GO:0007004 telomere maintenance via telomerase 255 3 0.42 0.00886 0.008860 BP LvH.I5
GO:0007354 zygotic determination of anterior/poster… 5 1 0.01 0.00827 0.008270 BP LvH.I5
GO:0007568 aging 740 5 1.23 0.00776 0.007760 BP LvH.I5
GO:0008013 beta-catenin binding 260 3 0.44 0.0099 0.009900 MF LvH.I5
GO:0008327 methyl-CpG binding 50 2 0.08 0.0033 0.003300 MF LvH.I5
GO:0015786 UDP-glucose transmembrane transport 12 1 0.01 0.0084 0.008400 BP MvH.I5
GO:0016571 histone methylation 495 4 0.82 0.00933 0.009330 BP LvH.I5
GO:0016571 histone methylation 495 3 0.35 0.0049 0.004900 BP MvH.I5
GO:0016823 hydrolase activity, acting on acid carbo… 6 1 0.00 0.0042 0.004200 MF MvH.I5
GO:0019441 tryptophan catabolic process to kynureni… 10 1 0.01 0.0070 0.007000 BP MvH.I5
GO:0019442 tryptophan catabolic process to acetyl-C… 6 1 0.01 0.00991 0.009910 BP LvH.I5
GO:0019442 tryptophan catabolic process to acetyl-C… 6 1 0.00 0.0042 0.004200 BP MvH.I5
GO:0019805 quinolinate biosynthetic process 8 1 0.01 0.0056 0.005600 BP MvH.I5
GO:0022904 respiratory electron transport chain 202 2 0.14 0.0088 0.008800 BP MvH.I5
GO:0030374 nuclear receptor transcription coactivat… 238 3 0.40 0.0078 0.007800 MF LvH.I5
GO:0031514 motile cilium 261 3 0.42 0.0089 0.008900 CC LvH.I5
GO:0032902 nerve growth factor production 6 1 0.01 0.00991 0.009910 BP LvH.I5
GO:0034354 ‘de novo’ NAD biosynthetic process from … 10 1 0.01 0.0070 0.007000 BP MvH.I5
GO:0034645 cellular macromolecule biosynthetic proc… 11790 31 19.56 0.00835 0.008350 BP LvH.I5
GO:0043199 sulfate binding 14 1 0.01 0.0098 0.009800 MF MvH.I5
GO:0043420 anthranilate metabolic process 6 1 0.01 0.00991 0.009910 BP LvH.I5
GO:0043420 anthranilate metabolic process 6 1 0.00 0.0042 0.004200 BP MvH.I5
GO:0044030 regulation of DNA methylation 48 2 0.08 0.00291 0.002910 BP LvH.I5
GO:0051091 positive regulation of DNA-binding trans… 607 3 0.43 0.0087 0.008700 BP MvH.I5
GO:0051321 meiotic cell cycle 659 6 1.09 0.00080 0.000800 BP LvH.I5
GO:0061035 regulation of cartilage development 185 3 0.31 0.00366 0.003660 BP LvH.I5
GO:0061515 myeloid cell development 206 3 0.34 0.00494 0.004940 BP LvH.I5
GO:0061624 fructose catabolic process to hydroxyace… 13 1 0.01 0.0091 0.009100 BP MvH.I5
GO:0070034 telomerase RNA binding 81 2 0.14 0.0084 0.008400 MF LvH.I5
GO:0071168 protein localization to chromatin 99 3 0.16 0.00061 0.000610 BP LvH.I5
GO:0071168 protein localization to chromatin 99 3 0.07 4.7e-05 0.000047 BP MvH.I5
GO:0071279 cellular response to cobalt ion 12 1 0.01 0.0084 0.008400 BP MvH.I5
GO:0071549 cellular response to dexamethasone stimu… 88 2 0.15 0.00947 0.009470 BP LvH.I5
GO:0097053 L-kynurenine catabolic process 9 1 0.01 0.0063 0.006300 BP MvH.I5
GO:0097750 endosome membrane tubulation 6 1 0.01 0.00991 0.009910 BP LvH.I5
GO:0140285 endosome fission 6 1 0.01 0.00991 0.009910 BP LvH.I5
GO:1903506 regulation of nucleic acid-templated tra… 7535 21 12.50 0.00909 0.009090 BP LvH.I5